library(tidyverse)
library(marginaleffects)
library(sandwich)
library(cobalt)
library(tictoc)
Use the code below at your own risk! I primarily wrote this to try to
get my head around what kmatch is doing in Stata.
Sample data:
data(lalonde)
Take a peek:
head(lalonde)
Homebaked kernel matching
First, get the propensity scores:
glm1 <- glm(
treat ~ age + educ + race + married + nodegree + re74 + re75,
data = lalonde,
family = binomial
)
lalonde$ps <- predict(glm1, type = "response")
lalonde %>%
ggplot(aes(x = ps, fill = factor(treat))) +
geom_density(alpha = .4) +
labs(x = "Propensity Scores", y = "Density", fill = "")

The Epanechnikov kernel:
epan <- function(x) {
(3/4)*(1-x^2)*(abs(x)<=1)
}
curve(epan(x), -4, 4, n = 1001)

I’ll want to rescale variables on an arbitrary range to [-1, 1]
rescale <- function(x, in_min, in_max, out_min, out_max) {
out_min + ((x - in_min)*(out_max - out_min)/(in_max - in_min))
}
rescale(seq(-0.3, 0.3, .1), -0.3, 0.3, -1, 1) |> round(2)
[1] -1.00 -0.67 -0.33 0.00 0.33 0.67 1.00
The tofu. This function produces a long dataset with control
observations potentially repeating a large number of times:
kmatch <- function(dat, radius, treat_var, ps_var) {
res <- data.frame()
# Give each observation a unique ID, for the SE calc later
temp <- dat |>
mutate(.id = 1:n())
treats <- temp |>
filter(!!sym(treat_var) == 1)
controls <- temp |>
filter(!!sym(treat_var) == 0) |>
arrange(!!sym(ps_var)) # hopefully speeds up filter
# work through treat obs and find matches
for (r in 1:nrow(treats)) {
cur_row <- treats[r,]
target_ps <- as.numeric(cur_row[ps_var])
# note the strictly less/greater than
matches <- controls |>
filter(!!sym(ps_var) > target_ps - radius &
!!sym(ps_var) < target_ps + radius) |>
mutate(.dist = abs(target_ps - !!sym(ps_var)),
.distnorm = rescale(.dist, -radius, radius, -1, 1),
.epan = epan(.distnorm),
.wt = .epan/sum(.epan)) # weights sum to 1 within a class
if (nrow(matches) >= 1) {
cur_row$.class <- r
cur_row$.wt <- 1 # treat obs gets weight 1
matches$.class <- r
res <- bind_rows(res, cur_row, matches)
}
}
res
}
Run it:
tic()
outdat <-
kmatch(
dat = lalonde,
treat_var = "treat",
radius = 0.03,
ps_var = "ps"
)
toc()
0.53 sec elapsed
nrow(lalonde)
[1] 614
nrow(outdat)
[1] 4696
Now aggregate the weights to make it easier to check for balance.
This yields the same weights as those produced by psmatch2
and kmatch and each control observation appears only
one.
outdat_agg <- outdat |>
group_by(.id) |>
summarise(.sumwt = sum(.wt))
widedat <- lalonde |>
mutate(.id = 1:n()) |>
right_join(outdat_agg)
Joining with `by = join_by(.id)`
Count
All data
lalonde |>
group_by(treat) |>
tally()
Matched data
outdat |>
group_by(treat) |>
summarise(unique_n = unique(.id) |> length(),
rep_n = .id |> length())
Check balance
bal_unadj <- bal.tab(
treat ~ age + educ + race + married + nodegree + re74 + re75,
data = lalonde,
binary = "std",
continuous = "std",
s.d.denom = "pooled"
)
bal_adj <- bal.tab(
treat ~ age + educ + race + married + nodegree + re74 + re75,
data = widedat,
weights = ".sumwt",
binary = "std",
continuous = "std",
s.d.denom = "pooled"
)
summary_tab <- merge(bal_unadj$Balance |> select(-Diff.Adj,-Type),
bal_adj$Balance |> select(-Diff.Un,-Type), by = 0, all = TRUE) |>
rename(Matched = Diff.Adj,
All = Diff.Un,
Variable = Row.names)
summary_tab
summary_tab |>
pivot_longer(cols = -Variable) |>
ggplot(aes(x = abs(value), y = factor(Variable), color = name)) +
geom_point() +
labs(y = "",
x = "|SMD|",
color = "")

Outcome model
The outcome model, as tentatively
suggested by MatchIt authors for another approach using
matching with replacement: “There is some evidence for an alternative
approach that incorporates pair membership and adjusts for reuse of
control units, though this has only been studied for survival
outcomes.”
Note interaction between treat and covariates. This is a
doubly-robust approach. Leave out the covariates if you don’t want
that.
outfit <-
lm(
re78 ~ treat * (age + educ + race + married + nodegree + re74 + re75),
data = outdat,
weights = .wt
)
Now estimate ATT with errors clustered by subclass and ID:
avg_comparisons(
outfit,
variables = "treat",
vcov = ~ .class + .id,
newdata = subset(outdat, treat == 1),
wts = ".wt"
)
Columns: term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high
Type: response
The following call replicates kmatch (no covariates, no
clustering by subclass):
avg_comparisons(
lm(re78 ~ treat, data = outdat, weights = .wt),
variables = "treat",
vcov = ~ .id,
newdata = subset(outdat, treat == 1),
wts = ".wt"
)
Columns: term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high
Type: response
LS0tDQp0aXRsZTogIkVwYW5lY2huaWtvdiBrZXJuZWwgbWF0Y2hpbmcgZXhwZXJpbWVudCINCmF1dGhvcjogIkFuZGkgRnVnYXJkIg0KZGF0ZTogIjI3IE1hcmNoIDIwMjMiDQpvdXRwdXQ6DQogIGh0bWxfZG9jdW1lbnQ6DQogICAgZGZfcHJpbnQ6IHBhZ2VkDQogIGh0bWxfbm90ZWJvb2s6DQogICAgY29kZV9mb2xkaW5nOiBub25lDQotLS0NCg0KYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShtYXJnaW5hbGVmZmVjdHMpDQpsaWJyYXJ5KHNhbmR3aWNoKQ0KbGlicmFyeShjb2JhbHQpDQpsaWJyYXJ5KHRpY3RvYykNCmBgYA0KDQpVc2UgdGhlIGNvZGUgYmVsb3cgYXQgeW91ciBvd24gcmlzayEgSSBwcmltYXJpbHkgd3JvdGUgdGhpcyB0byB0cnkgdG8gZ2V0IG15IGhlYWQgYXJvdW5kIHdoYXQgYGttYXRjaGAgaXMgZG9pbmcgaW4gU3RhdGEuDQoNClNhbXBsZSBkYXRhOg0KDQpgYGB7cn0NCmRhdGEobGFsb25kZSkNCmBgYA0KDQpUYWtlIGEgcGVlazoNCg0KYGBge3J9DQpoZWFkKGxhbG9uZGUpDQpgYGANCg0KDQojIyBIb21lYmFrZWQga2VybmVsIG1hdGNoaW5nDQoNCkZpcnN0LCBnZXQgdGhlIHByb3BlbnNpdHkgc2NvcmVzOg0KDQpgYGB7cn0NCmdsbTEgPC0gZ2xtKA0KICAgIHRyZWF0IH4gYWdlICsgZWR1YyArIHJhY2UgKyBtYXJyaWVkICsgbm9kZWdyZWUgKyByZTc0ICsgcmU3NSwNCiAgICBkYXRhID0gbGFsb25kZSwNCiAgICBmYW1pbHkgPSBiaW5vbWlhbA0KICApDQpsYWxvbmRlJHBzIDwtIHByZWRpY3QoZ2xtMSwgdHlwZSA9ICJyZXNwb25zZSIpDQpgYGANCg0KDQpgYGB7cn0NCmxhbG9uZGUgJT4lDQogIGdncGxvdChhZXMoeCA9IHBzLCBmaWxsID0gZmFjdG9yKHRyZWF0KSkpICsNCiAgZ2VvbV9kZW5zaXR5KGFscGhhID0gLjQpICsgDQogIGxhYnMoeCA9ICJQcm9wZW5zaXR5IFNjb3JlcyIsIHkgPSAiRGVuc2l0eSIsIGZpbGwgPSAiIikgDQpgYGANCg0KVGhlIEVwYW5lY2huaWtvdiBrZXJuZWw6DQoNCmBgYHtyfQ0KZXBhbiA8LSBmdW5jdGlvbih4KSB7DQogICgzLzQpKigxLXheMikqKGFicyh4KTw9MSkNCn0NCmBgYA0KDQpgYGB7ciBkcGk9MzAwfQ0KY3VydmUoZXBhbih4KSwgLTQsIDQsIG4gPSAxMDAxKQ0KYGBgDQoNCkknbGwgd2FudCB0byByZXNjYWxlIHZhcmlhYmxlcyBvbiBhbiBhcmJpdHJhcnkgcmFuZ2UgdG8gWy0xLCAxXQ0KDQpgYGB7cn0NCnJlc2NhbGUgPC0gZnVuY3Rpb24oeCwgaW5fbWluLCBpbl9tYXgsIG91dF9taW4sIG91dF9tYXgpIHsNCiAgb3V0X21pbiArICgoeCAtIGluX21pbikqKG91dF9tYXggLSBvdXRfbWluKS8oaW5fbWF4IC0gaW5fbWluKSkNCn0NCmBgYA0KDQpgYGB7cn0NCnJlc2NhbGUoc2VxKC0wLjMsIDAuMywgLjEpLCAtMC4zLCAwLjMsIC0xLCAxKSB8PiByb3VuZCgyKQ0KYGBgDQoNClRoZSB0b2Z1LiBUaGlzIGZ1bmN0aW9uIHByb2R1Y2VzIGEgbG9uZyBkYXRhc2V0IHdpdGggY29udHJvbCBvYnNlcnZhdGlvbnMgcG90ZW50aWFsbHkgcmVwZWF0aW5nIGEgbGFyZ2UgbnVtYmVyIG9mIHRpbWVzOg0KDQpgYGB7cn0NCmttYXRjaCA8LSBmdW5jdGlvbihkYXQsIHJhZGl1cywgdHJlYXRfdmFyLCBwc192YXIpIHsNCiAgcmVzIDwtIGRhdGEuZnJhbWUoKQ0KICANCiAgIyBHaXZlIGVhY2ggb2JzZXJ2YXRpb24gYSB1bmlxdWUgSUQsIGZvciB0aGUgU0UgY2FsYyBsYXRlcg0KICB0ZW1wIDwtIGRhdCB8Pg0KICAgIG11dGF0ZSguaWQgPSAxOm4oKSkNCiAgDQogIHRyZWF0cyA8LSB0ZW1wIHw+DQogICAgZmlsdGVyKCEhc3ltKHRyZWF0X3ZhcikgPT0gMSkNCiAgY29udHJvbHMgPC0gdGVtcCB8Pg0KICAgIGZpbHRlcighIXN5bSh0cmVhdF92YXIpID09IDApIHw+DQogICAgYXJyYW5nZSghIXN5bShwc192YXIpKSAjIGhvcGVmdWxseSBzcGVlZHMgdXAgZmlsdGVyDQogIA0KICAjIHdvcmsgdGhyb3VnaCB0cmVhdCBvYnMgYW5kIGZpbmQgbWF0Y2hlcw0KICBmb3IgKHIgaW4gMTpucm93KHRyZWF0cykpIHsNCiAgICBjdXJfcm93IDwtIHRyZWF0c1tyLF0NCiAgICB0YXJnZXRfcHMgPC0gYXMubnVtZXJpYyhjdXJfcm93W3BzX3Zhcl0pDQogICAgDQogICAgIyBub3RlIHRoZSBzdHJpY3RseSBsZXNzL2dyZWF0ZXIgdGhhbg0KICAgIG1hdGNoZXMgPC0gY29udHJvbHMgfD4gDQogICAgICBmaWx0ZXIoISFzeW0ocHNfdmFyKSA+IHRhcmdldF9wcyAtIHJhZGl1cyAmDQogICAgICAgICAgICAgISFzeW0ocHNfdmFyKSA8IHRhcmdldF9wcyArIHJhZGl1cykgfD4NCiAgICAgIG11dGF0ZSguZGlzdCA9IGFicyh0YXJnZXRfcHMgLSAhIXN5bShwc192YXIpKSwNCiAgICAgICAgICAgICAuZGlzdG5vcm0gPSByZXNjYWxlKC5kaXN0LCAtcmFkaXVzLCByYWRpdXMsIC0xLCAxKSwNCiAgICAgICAgICAgICAuZXBhbiA9IGVwYW4oLmRpc3Rub3JtKSwNCiAgICAgICAgICAgICAud3QgPSAuZXBhbi9zdW0oLmVwYW4pKSAjIHdlaWdodHMgc3VtIHRvIDEgd2l0aGluIGEgY2xhc3MNCiAgICANCiAgICBpZiAobnJvdyhtYXRjaGVzKSA+PSAxKSB7DQogICAgICBjdXJfcm93JC5jbGFzcyA8LSByDQogICAgICBjdXJfcm93JC53dCAgICA8LSAxICMgdHJlYXQgb2JzIGdldHMgd2VpZ2h0IDENCiAgICAgIG1hdGNoZXMkLmNsYXNzIDwtIHINCiAgICANCiAgICAgIHJlcyA8LSBiaW5kX3Jvd3MocmVzLCBjdXJfcm93LCBtYXRjaGVzKQ0KICAgIH0NCiAgfQ0KICANCiAgcmVzDQp9DQpgYGANCg0KDQpSdW4gaXQ6DQoNCmBgYHtyfQ0KdGljKCkNCm91dGRhdCA8LQ0KICBrbWF0Y2goDQogICAgZGF0ID0gbGFsb25kZSwNCiAgICB0cmVhdF92YXIgPSAidHJlYXQiLA0KICAgIHJhZGl1cyA9IDAuMDMsDQogICAgcHNfdmFyID0gInBzIg0KICApDQp0b2MoKQ0KYGBgDQoNCmBgYHtyfQ0KbnJvdyhsYWxvbmRlKQ0KbnJvdyhvdXRkYXQpDQpgYGANCg0KTm93IGFnZ3JlZ2F0ZSB0aGUgd2VpZ2h0cyB0byBtYWtlIGl0IGVhc2llciB0byBjaGVjayBmb3IgYmFsYW5jZS4gVGhpcyB5aWVsZHMgdGhlIHNhbWUgd2VpZ2h0cyBhcyB0aG9zZSBwcm9kdWNlZCBieSBgcHNtYXRjaDJgIGFuZCBga21hdGNoYCBhbmQgZWFjaCBjb250cm9sIG9ic2VydmF0aW9uIGFwcGVhcnMgb25seSBvbmUuDQoNCmBgYHtyfQ0Kb3V0ZGF0X2FnZyA8LSBvdXRkYXQgfD4NCiAgZ3JvdXBfYnkoLmlkKSB8Pg0KICBzdW1tYXJpc2UoLnN1bXd0ID0gc3VtKC53dCkpDQoNCndpZGVkYXQgPC0gbGFsb25kZSB8Pg0KICBtdXRhdGUoLmlkID0gMTpuKCkpIHw+DQogIHJpZ2h0X2pvaW4ob3V0ZGF0X2FnZykNCmBgYA0KDQojIENvdW50DQoNCiMjIEFsbCBkYXRhDQoNCmBgYHtyfQ0KbGFsb25kZSB8Pg0KICBncm91cF9ieSh0cmVhdCkgfD4NCiAgdGFsbHkoKQ0KYGBgDQoNCiMjIE1hdGNoZWQgZGF0YQ0KDQpgYGB7cn0NCm91dGRhdCB8Pg0KICBncm91cF9ieSh0cmVhdCkgfD4NCiAgc3VtbWFyaXNlKHVuaXF1ZV9uID0gdW5pcXVlKC5pZCkgfD4gbGVuZ3RoKCksDQogICAgICAgICAgICByZXBfbiA9IC5pZCB8PiBsZW5ndGgoKSkNCmBgYA0KDQoNCiMgQ2hlY2sgYmFsYW5jZQ0KDQpgYGB7cn0NCmJhbF91bmFkaiA8LSBiYWwudGFiKA0KICB0cmVhdCB+IGFnZSArIGVkdWMgKyByYWNlICsgbWFycmllZCArIG5vZGVncmVlICsgcmU3NCArIHJlNzUsDQogIGRhdGEgPSBsYWxvbmRlLA0KICBiaW5hcnkgPSAic3RkIiwNCiAgY29udGludW91cyA9ICJzdGQiLA0KICBzLmQuZGVub20gPSAicG9vbGVkIg0KKQ0KYGBgDQoNCmBgYHtyfQ0KYmFsX2FkaiA8LSBiYWwudGFiKA0KICB0cmVhdCB+IGFnZSArIGVkdWMgKyByYWNlICsgbWFycmllZCArIG5vZGVncmVlICsgcmU3NCArIHJlNzUsDQogIGRhdGEgPSB3aWRlZGF0LA0KICB3ZWlnaHRzID0gIi5zdW13dCIsDQogIGJpbmFyeSA9ICJzdGQiLA0KICBjb250aW51b3VzID0gInN0ZCIsDQogIHMuZC5kZW5vbSA9ICJwb29sZWQiDQopDQpgYGANCg0KYGBge3J9DQpzdW1tYXJ5X3RhYiA8LSBtZXJnZShiYWxfdW5hZGokQmFsYW5jZSB8PiBzZWxlY3QoLURpZmYuQWRqLC1UeXBlKSwNCiAgICAgIGJhbF9hZGokQmFsYW5jZSAgIHw+IHNlbGVjdCgtRGlmZi5VbiwtVHlwZSksIGJ5ID0gMCwgYWxsID0gVFJVRSkgfD4NCiAgcmVuYW1lKE1hdGNoZWQgID0gRGlmZi5BZGosDQogICAgICAgICBBbGwgICAgICA9IERpZmYuVW4sDQogICAgICAgICBWYXJpYWJsZSA9IFJvdy5uYW1lcykNCnN1bW1hcnlfdGFiDQpgYGANCg0KDQpgYGB7ciBkcGk9MzAwfQ0Kc3VtbWFyeV90YWIgfD4NCiAgcGl2b3RfbG9uZ2VyKGNvbHMgPSAtVmFyaWFibGUpIHw+DQogIGdncGxvdChhZXMoeCA9IGFicyh2YWx1ZSksIHkgPSBmYWN0b3IoVmFyaWFibGUpLCBjb2xvciA9IG5hbWUpKSArDQogIGdlb21fcG9pbnQoKSArDQogIGxhYnMoeSA9ICIiLA0KICAgICAgIHggPSAifFNNRHwiLA0KICAgICAgIGNvbG9yID0gIiIpDQpgYGANCg0KDQojIE91dGNvbWUgbW9kZWwNCg0KVGhlIG91dGNvbWUgbW9kZWwsIGFzIFt0ZW50YXRpdmVseSBzdWdnZXN0ZWRdKGh0dHBzOi8vY3Jhbi5yLXByb2plY3Qub3JnL3dlYi9wYWNrYWdlcy9NYXRjaEl0L3ZpZ25ldHRlcy9lc3RpbWF0aW5nLWVmZmVjdHMuaHRtbCkgYnkgYE1hdGNoSXRgIGF1dGhvcnMgZm9yIGFub3RoZXIgYXBwcm9hY2ggdXNpbmcgbWF0Y2hpbmcgd2l0aCByZXBsYWNlbWVudDogIlRoZXJlIGlzIHNvbWUgZXZpZGVuY2UgZm9yIGFuIGFsdGVybmF0aXZlIGFwcHJvYWNoIHRoYXQgaW5jb3Jwb3JhdGVzIHBhaXIgbWVtYmVyc2hpcCBhbmQgYWRqdXN0cyBmb3IgcmV1c2Ugb2YgY29udHJvbCB1bml0cywgdGhvdWdoIHRoaXMgaGFzIG9ubHkgYmVlbiBzdHVkaWVkIGZvciBzdXJ2aXZhbCBvdXRjb21lcy4iDQoNCk5vdGUgaW50ZXJhY3Rpb24gYmV0d2VlbiB0cmVhdCBhbmQgY292YXJpYXRlcy4gVGhpcyBpcyBhIGRvdWJseS1yb2J1c3QgYXBwcm9hY2guIExlYXZlIG91dCB0aGUgY292YXJpYXRlcyBpZiB5b3UgZG9uJ3Qgd2FudCB0aGF0Lg0KDQpgYGB7cn0NCm91dGZpdCA8LQ0KICBsbSgNCiAgICByZTc4IH4gdHJlYXQgKiAoYWdlICsgZWR1YyArIHJhY2UgKyBtYXJyaWVkICsgbm9kZWdyZWUgKyByZTc0ICsgcmU3NSksDQogICAgZGF0YSA9IG91dGRhdCwNCiAgICB3ZWlnaHRzID0gLnd0DQogICkNCmBgYA0KDQpOb3cgZXN0aW1hdGUgQVRUIHdpdGggZXJyb3JzIGNsdXN0ZXJlZCBieSBzdWJjbGFzcyBhbmQgSUQ6DQoNCmBgYHtyfQ0KYXZnX2NvbXBhcmlzb25zKA0KICBvdXRmaXQsDQogIHZhcmlhYmxlcyA9ICJ0cmVhdCIsDQogIHZjb3YgPSB+IC5jbGFzcyArIC5pZCwNCiAgbmV3ZGF0YSA9IHN1YnNldChvdXRkYXQsIHRyZWF0ID09IDEpLA0KICB3dHMgPSAiLnd0Ig0KKQ0KYGBgDQoNClRoZSBmb2xsb3dpbmcgY2FsbCByZXBsaWNhdGVzIGBrbWF0Y2hgIChubyBjb3ZhcmlhdGVzLCBubyBjbHVzdGVyaW5nIGJ5IHN1YmNsYXNzKToNCg0KYGBge3J9DQphdmdfY29tcGFyaXNvbnMoDQogIGxtKHJlNzggfiB0cmVhdCwgZGF0YSA9IG91dGRhdCwgd2VpZ2h0cyA9IC53dCksDQogIHZhcmlhYmxlcyA9ICJ0cmVhdCIsDQogIHZjb3YgPSB+IC5pZCwNCiAgbmV3ZGF0YSA9IHN1YnNldChvdXRkYXQsIHRyZWF0ID09IDEpLA0KICB3dHMgPSAiLnd0Ig0KKQ0KYGBgDQoNCg0K